/* ******************************************************************************************************* */
/* ***************** Constructs a dataset of firm characteristics before shareholder meetings  *************/
/* ******************************************************************************************************* */

/* ********************************************************************************** */
/* Step 6.1: Start from Compustat (temp.cst_matched is firm-meeting date panel data); */
/* ********************************************************************************** */
proc sort data=comp.funda (keep=gvkey cusip conm datadate datafmt indfmt popsrc consol fyear fyr sich naicsh fic xad cik act lct epspx emp
  che at dltt lt pstkl pstk sale oibdp oiadp dpc dp ib dvc csho dlc txditc cogs xrd pstkrv ceq txdb dcvt lt xsga prcc_f ppent xint txt capx aqc curcd) out=cst nodupkey;
  where at > 0 and sale >= 0 and 2019 >= fyear >= 2001 and indfmt= 'INDL' and datafmt= 'STD' and popsrc= 'D' and consol= 'C' and curcd = 'USD'; 
  * retrieve accounting information as early as 1960 to calculate cash flow volatility over the past 20 years;
  by gvkey fyear;
run;

data cst; set cst;
  if fyr gt 0;
  cyear = fyear;
  if fyr le 5 and fyr gt 0 then cyear = cyear+1;
  fyrenddt = intnx('month', mdy(fyr, 28, cyear), 0, 'end');
  format fyrenddt mmddyyn8.;
  drop indfmt datafmt popsrc consol curcd;

proc sql;
  create table cst_zip as
  select a.*, b.addzip, b.incorp, b.city as city_comp, b.state as state_comp, b.loc as country
  from cst as a left join comp.company as b
  on a.gvkey = b.gvkey;
quit;

data cst_zip; set cst_zip;
  zipcode = substr(addzip, 1, 5)*1;
  zipcode4 = int(zipcode/10);

proc sql;
  create table cst_zip3 as
  select a.*, b.longitude, b.latitude, b.city, b.state
  from cst_zip as a left join comp.zipcode as b
  on a.zipcode = b.zip;
quit;

proc sql;
  create table cstrating as
  select a.*, b.splticrm
  from cst_zip3 as a left join comp.adsprate as b
  on a.gvkey = b.gvkey and intck('month', b.datadate, a.fyrenddt) in (0);
quit;

proc sql;
  create table cst_matched as 
  select a.*, b.lpermno as permno, b.lpermco as permco, b.linktype, b.linkprim, b.liid
  from cstrating as a, crspcc.ccmxpf_linktable as b
  where a.gvkey = b.gvkey and linktype in ("LC", "LN", "LU", "LX", "LD", "LS") and
        usedflag in (1) and (fyrenddt >= linkdt or linkdt = .B) and (fyrenddt <= linkenddt or linkenddt = .E)
  order by gvkey, fyrenddt, lpermno, lpermco, linkdt, linkenddt; 
quit; 

proc sort data = cst_matched; by gvkey permco permno fyear linktype descending linkprim liid;

data cst_matched1; set cst_matched; by gvkey permco permno fyear linktype descending linkprim liid;
  if first.fyear;
run;

/* *************************************************************************** */
/* Step 6.2: Merge with CRSP (temp.cst_matched2 has firm-level panel data); */
/* *************************************************************************** */

****************************************************************************************************************
***************************   Construct a firm-meeting date panel for all firms  ******************************* 
****************************************************************************************************************;
data stock_list; set temp.meetings_car_permno;
  keep permno meetingdate;

proc sort nodupkey; by permno meetingdate; where permno ne .;
 
proc sort data=crsp.stocknames (keep= permno permco comnam namedt nameenddt shrcd ticker exchcd siccd ncusip) out = permnolist nodupkey;
  by permno namedt nameenddt;
run;

proc sql undo_policy = none;
	create table cst_list as select distinct
	a.*, b.fyrenddt, b.gvkey
	from stock_list a, cst_matched1 b
	where (a.permno = b.permno) and 3 <= intck('month', b.fyrenddt, a.meetingdate) <= 18;

	* add PERMNO - note more than one PERMNO can map to a PERMCO;
	create table cst_list as select distinct
	a.*, b.permco, b.siccd, b.shrcd, b.exchcd, b.ticker, b.comnam, b.ncusip
	from cst_list a left join permnolist b
	on (a.permno = b.permno) and (namedt <= a.meetingdate <= nameenddt); 

	* add market cap, shares outstanding, number of share class at fyrenddt;
	create table cst_list as select distinct
	a.*, (abs(b.prc)*b.shrout/1000) as mve_crsp_permno, abs(b.prc) as prc_crsp_permno, b.shrout as shrout_permno, b.date as mve_date 
	from cst_list a left join crsp.msf b
	on (a.permno = b.permno) and intck('month', b.date, a.meetingdate) = 1;

	* merge with CRSP;
	create table cst_matched1b as select 
	*
	from cst_list as a left join cst_matched1 as b
	on a.fyrenddt = b.fyrenddt and a.permno =  b.permno; 
quit;

proc sort data = cst_list nodupkey; by gvkey meetingdate permco permno fyrenddt; **** DUAL-class firms show up as two observations in this dataset;

data cst_list; set cst_list; by gvkey meetingdate permco permno fyrenddt;
  if last.permno; 

proc means data = cst_list noprint; by gvkey meetingdate permco; 
  var mve_crsp_permno shrout_permno; 
  output out = cst_list2 (drop=_type_ _freq_) sum = mve_crsp shrout;
run;

proc sql;
  create table cst_matched2 as select 
  *
  from cst_matched1b a, cst_list2 b
  where a.gvkey = b.gvkey and a.meetingdate = b.meetingdate and a.permco = b.permco; *  217,436 OBS;
quit;



data cst_matched2a; set cst_matched2;
  if permco ne .;
  fyme = prcc_f*csho;
  pct_diff = mve_crsp/fyme - 1;
  fyme2 = fyme;
  if fyme < mve_crsp then fyme2 = mve_crsp;
  ratio = csho*1000/shrout;

  name_dis = min(spedis(conm, comnam), spedis(comnam, conm));

proc sort data = cst_matched2a nodupkey; by gvkey permco meetingdate permno; run;

proc sql;
  create table cst_matched2a as
  select a.*, b.ceq as ceq_lag, b.at as at_lag, b.sale as sale_lag, b.fyrenddt as fyrenddt_lag, abs(intck('month', b.fyrenddt, a.fyrenddt)-12) as diff
  from cst_matched2a as a left join cst_matched2a as b
  on a.gvkey = b.gvkey and a.permco = b.permco and a.permno = b.permno and a.fyear = b.fyear+1;
run;

proc sort data = cst_matched2a nodupkey; by gvkey permco meetingdate permno diff; 

data cst_matched2a; set cst_matched2a; by gvkey permco meetingdate permno diff; 
  if first.permno;

proc means data = cst_matched2a n mean median p25 p75 min max;
  var fyme mve_crsp pct_diff fyear;
run;


proc sql;
  create table cst_matched2b as
  select a.*, input(a.gvkey, best8.0) as gvkey_num, b.ff48 as ind, b.ff48ind
  from cst_matched2a as a left join root.ff48 as b
  on ff1 <= siccd <= ff2;
quit;

proc sql;
  create table cst_matched2c as
  select a.*, b.start as sp500_start, b.ending as sp500_ending
  from cst_matched2b as a left join crspix.msp500list as b
  on a.permno = b.permno and b.start <= a.meetingdate <= b.ending;
quit;

* calculate # of business segments and geographic segments;
proc sql;
  create table busseg as
  select a.permco, a.gvkey, a.meetingdate, a.fyear, b.sid, b.sales as seg_sale
  from cst_matched2c as a, seghist.seg_annfund as b
  where a.gvkey = b.gvkey and a.datadate = b.datadate and b.stype = 'BUSSEG';
quit;


proc sort data = busseg; by permco gvkey meetingdate; 

proc means data = busseg noprint; by permco gvkey meetingdate;
  var seg_sale;
  output out = temp_busseg (drop=_type_ _freq_) n = num_busseg sum = total_sale;

data temp; merge busseg temp_busseg; by permco gvkey meetingdate;
  if total_sale > 0 then frac2 = (seg_sale/total_sale)*(seg_sale/total_sale);

proc means noprint; by permco gvkey meetingdate;
  var frac2;
  output out = temp_busseg2 (drop=_type_ _freq_) sum = hhi;

proc sql;
  create table geoseg as
  select a.permco, a.gvkey, a.meetingdate, a.fyear, b.sid, b.sales as seg_sale
  from cst_matched2c as a, seghist.seg_annfund as b
  where a.gvkey = b.gvkey and a.datadate = b.datadate and b.stype = 'GEOSEG';
quit;

proc sort data = geoseg; by permco gvkey meetingdate; 

proc means data = geoseg noprint; by permco gvkey meetingdate;
  var seg_sale;
  output out = temp_geoseg (drop=_type_ _freq_) n = num_geoseg;

proc sort data = cst_matched2c; by permco gvkey meetingdate;

data cst_matched2d; merge cst_matched2c (in=a) temp_busseg temp_geoseg temp_busseg2; by permco gvkey meetingdate;
  if siccd not in (0, .) and ind = . then ind = 48;
  cash = che/at; 
  if siccd not in (0, .) then do; 
    ind2 = int(siccd/100);
    ind3 = int(siccd/10);
  end;

  if sp500_ending ne . then sp500 = 1; else sp500 = 0;

  if dltt <= .Z then dltt = 0;
  if capx <= .Z then capx = 0;
  if fyme >0 and dltt >= 0 then capital = log(fyme+dltt);
  if fyme < 0 then capital = log(fyme2+dltt);

  if pstkl >= 0 then prf = pstkl; else if pstkrv >= 0 then prf = pstkrv; else if pstk >= 0 then prf = pstk;
  if splticrm not in ('', 'N.M.', 'SD', 'Suspended') then rating = 1; else rating = 0;
  if epspx not in (0, .) then pe = prcc_f/epspx;
  if txditc <= .Z then txditc = 0;
  debt = lt+prf-txditc-dcvt;
  debt2 = dltt+dlc;
  ebitda_ta = oibdp/at;
  mv = lt-txditc+prf+fyme;
  if act <= .Z then act = 0;
  if lct <= .Z then lct = 0;
  liquidity = (act-lct)/at;
  de = dltt/ceq;
  mb = fyme/ceq;
  bm = ceq/fyme;
  q1 = (at+fyme-ceq)/at;
  q2= (at+fyme2-ceq)/at;
  q3= (lt-txditc+prf+fyme)/at;
  q4= (lt-txditc+prf+fyme2)/at;
  q5 = (at+mve_crsp-ceq-txdb)/at;
  acquisition = aqc/at;
  tangible = ppent/at;
  capex = capx/at;
  size = log(at);
  sales = log(1+sale);
  be = at-lt+txditc-prf;
  leverage_b = (at-be)/at; 
  leverage_m = (at-be)/mv;
  leverage_bnet = (at-be-che)/at; 
  leverage_mnet = (at-be-che)/mv;
  yield = dvc/fyme;
  yield2 = dvc/fyme2;

  if dvc > 0 then div = 1; else div = 0;
  cashflow_at = (ib+dp)/at;
  cashflow = ib+dp;
  if xint <= .Z then xint = 0;
  freecashflow_at = (oibdp-xint-txt-capx)/at;
  *	IB - Income Before Extraordinary Items;
  * DP - Depreciation and Amortization;
  if xrd > .Z then do; 
    rnd = xrd/at; rnd_sale = xrd/sale; rnd_dummy = 0; end;
    else do; 
      rnd = 0; rnd_sale = 0; rnd_dummy = 1; end;
  invest = capex+rnd;
  if oibdp = . and oiadp+dpc ne . then oibdp = oiadp+dpc;
  roa = oibdp/at_lag;
  roa2 = oibdp/at;
  roa3 = ib/at_lag;
  roa4 = ib/at;
  if num_geoseg in (., 0) then num_geoseg = 1;
  if num_busseg in (., 0) then num_busseg = 1;
  if xad > .Z then do; 
    adv = xad/at; adv_sale = xad/sale; adv_dummy = 0; end;
    else do; 
      adv = 0; adv_sale =0; adv_dummy = 1; end;
  logme = log(fyme);
  logme2 = log(fyme2);
  if fyme > 0 and dltt >= 0 then capital = log(fyme+dltt);
  if capital = . then capital = logme2;

proc sql;
  create table cst_matched2d as
  select a.*, b.roa as roa_post, b.roa2 as roa2_post, b.roa3 as roa3_post, b.roa4 as roa4_post, b.fyrenddt as fyrenddt_post, abs(intck('month', b.fyrenddt, a.fyrenddt)+12) as diff
  from cst_matched2d as a left join cst_matched2d as b
  on a.gvkey = b.gvkey and a.permco = b.permco and a.permno = b.permno and a.fyear = b.fyear-1;
run;

proc sort data = cst_matched2d nodupkey; by gvkey permco meetingdate permno diff; 

data cst_matched2d; set cst_matched2d; by gvkey permco meetingdate permno diff; 
  if first.permno;

proc sort data = cst_matched2d nodupkey; by gvkey permno meetingdate;

proc sort data = cst_matched2d; by descending q5;
run;



************************************************************************************************;
********************************** Calculate prior stock returns *******************************;
************************************************************************************************;
*** Industry-adjusted stock returns;
data stocknames; set crsp.stocknames;
  where siccd ne . and exchcd in (1, 2, 3); 
  keep permno namedt nameenddt siccd;

proc sql;
  create table stocknames as
  select a.*, b.ff48, b.ff48ind
  from stocknames as a left join root.ff48 as b
  on ff1 <= siccd <= ff2;
quit;

proc sql;
  create table crsp_ret as 
  select a.*, b.ret, b.date, abs(b.prc)*b.shrout as me
  from stocknames as a, crsp.msf as b
  where a.permno = b.permno and a.namedt <= b.date <= a.nameenddt and year(b.date) >= 2000;
quit;

proc sql;
  create table crsp_ret as
  select a.*, b.fyrenddt, b.at, b.sale
  from crsp_ret as a, cst_matched1 as b
  where a.permno = b.permno and 3 <= intck('month', b.fyrenddt, a.date) <= 18;
quit; * cst_matched1 requires that the firm have at > 0 and sale >= 0;

proc sort; by permno descending date fyrenddt; 

data crsp_ret; set crsp_ret; by permno descending date;
  if last.date;

data crsp_ret2; set crsp_ret; by permno descending date; 
  set crsp_ret (firstobs=2 keep=permno date me rename=(permno=permno2 date=date2 me=me_lag));
  if permno ne permno2 or intck('month', date2, date) not in (1) then me_lag = .;
  sic2 = int(siccd/100);
  sic3 = int(siccd/10);
  drop permno2 date2;
run;

proc sort data = crsp_ret2; by siccd date;

proc means data = crsp_ret2 noprint; by siccd date;
  var ret; weight me_lag; where me_lag ne .;
  output out = ret_siccd (drop=_type_ _freq_) mean = ret_siccd;

proc means data = crsp_ret2 noprint; by siccd date;
  var ret; where me_lag ne .;
  output out = ret_siccd_ (drop=_type_ _freq_) median = ret_siccd_med;

data ret_siccd; merge ret_siccd ret_siccd_; by siccd date;

proc sort data = crsp_ret2; by sic2 date;

proc means data = crsp_ret2 noprint; by sic2 date;
  var ret; weight me_lag; where me_lag ne .;
  output out = ret_sic2 (drop=_type_ _freq_) mean = ret_sic2;

proc means data = crsp_ret2 noprint; by sic2 date;
  var ret; where me_lag ne .;
  output out = ret_sic2_ (drop=_type_ _freq_) median = ret_sic2_med;

data ret_sic2; merge ret_sic2 ret_sic2_; by sic2 date;

proc sort data = crsp_ret2; by ff48 date;

proc means data = crsp_ret2 noprint; by ff48 date;
  var ret; weight me_lag; where me_lag ne .;
  output out = ret_ff48 (drop=_type_ _freq_) mean = ret_ff48;

proc means data = crsp_ret2 noprint; by ff48 date;
  var ret; where me_lag ne .;
  output out = ret_ff48_ (drop=_type_ _freq_) median = ret_ff48_med;

data ret_ff48; merge ret_ff48 ret_ff48_; by ff48 date;
run;

**** Before MEETING date ****;
proc sort data = cst_matched2d (keep=permno siccd ind meetingdate) out = ret nodupkey; by permno meetingdate; run;

proc sql;
  create table ret1 as
  select a.*, int(siccd/100) as sic2, b.ret, b.date as datem
  from ret as a, crsp.msf as b
  where a.permno = b.permno and 1 <= intck('month', b.date, a.meetingdate) <= 36;
quit;

proc sql;
  create table ret2
  as select a.*, b.vwretd, a.ret-b.vwretd as xret
  from ret1 as a, crsp.msi as b
  where a.datem = b.date;
quit;

proc sql;
  create table ret2
  as select a.*, a.ret-b.ret_siccd as xret_siccd, b.ret_siccd_med
  from ret2 as a left join ret_siccd as b
  on a.siccd = b.siccd and a.datem = b.date;
quit;

proc sql;
  create table ret2
  as select a.*, a.ret-b.ret_sic2 as xret_sic2
  from ret2 as a left join ret_sic2 as b
  on a.sic2 = b.sic2 and a.datem = b.date;
quit;

proc sql;
  create table ret2
  as select a.*, a.ret-b.ret_ff48 as xret_ff48
  from ret2 as a left join ret_ff48 as b
  on a.ind = b.ff48 and a.datem = b.date;
quit;

proc sort data = ret2; by permno descending meetingdate;

proc means data = ret2 noprint; by permno descending meetingdate;
  var ret xret vwretd xret_siccd xret_sic2 xret_ff48 ret_siccd_med; 
  output out = xret3y (drop=_type_ _freq_) sum = ret3y xret3y mret3y xret_siccd_3y xret_sic2_3y xret_ff48_3y ret_siccd_med_3y std(ret) = vol3y std(xret) = ivol3y n(xret) = num_months36;

proc means data = ret2 noprint; by permno descending meetingdate;
  var ret xret vwretd xret_siccd xret_sic2 xret_ff48 ret_siccd_med; where intck('month', datem, meetingdate) <= 24;
  output out = xret2y (drop=_type_ _freq_) sum = ret2y xret2y mret2y xret_siccd_2y xret_sic2_2y xret_ff48_2y ret_siccd_med_2y std(ret) = vol2y std(xret) = ivol2y n(xret) = num_months24;

proc means data = ret2 noprint; by permno descending meetingdate;
  var ret xret vwretd xret_siccd xret_sic2 xret_ff48 ret_siccd_med; where intck('month', datem, meetingdate) <= 12;
  output out = xret1y (drop=_type_ _freq_) sum = ret1y xret1y mret1y xret_siccd_1y xret_sic2_1y xret_ff48_1y ret_siccd_med_1y std(ret) = vol1y std(xret) = ivol1y n(xret) = num_months12;

data temp_cst_return; merge xret3y xret1y xret2y; by permno descending meetingdate;
  if ret3y >= ret_siccd_med_3y > . then ret3y_lt_siccd = 0; else 
  if ret_siccd_med_3y > ret3y > . then ret3y_lt_siccd = 1;
  if ret1y >= ret_siccd_med_1y > . then ret1y_lt_siccd = 0; else 
  if ret_siccd_med_1y > ret1y > . then ret1y_lt_siccd = 1;
run;

proc sql;
  create table cst_matched2x as
  select a.*, b.*
  from cst_matched2d as a left join temp_cst_return as b
  on a.permno = b.permno and a.meetingdate = b.meetingdate;
quit;

**** Before FISCAL YEAR-END date ****;
proc sort data = cst_matched2d (keep=permno datadate) out = ret nodupkey; by permno datadate;

proc sql;
  create table ret1 as
  select a.*, b.ret, b.date as datem
  from ret as a, crsp.msf as b
  where a.permno = b.permno and 0 <= intck('month', b.date, a.datadate) <= 35;
quit;

proc sql;
  create table ret2
  as select a.*, b.vwretd, a.ret-b.vwretd as xret
  from ret1 as a, crsp.msi as b
  where a.datem = b.date;
quit;

proc sort data = ret2; by permno descending datadate;

proc means data = ret2 noprint; by permno descending datadate;
  var ret xret vwretd; 
  output out = xret3y_f (drop=_type_ _freq_) sum = ret3y_f xret3y_f mret3y_f;

proc means data = ret2 noprint; by permno descending datadate;
  var ret xret vwretd; where intck('month', datem, datadate) <= 24;
  output out = xret2y_f (drop=_type_ _freq_) sum = ret2y_f xret2y_f mret2y_f;

proc means data = ret2 noprint; by permno descending datadate;
  var ret xret vwretd; where intck('month', datem, datadate) <= 12;
  output out = xret1y_f (drop=_type_ _freq_) sum = ret1y_f xret1y_f mret1y_f;

data temp_cst_return_f; merge xret3y_f xret1y_f xret2y_f; by permno descending datadate;
run;

proc sql;
  create table cst_matched2x as
  select a.*, b.*
  from cst_matched2x as a left join temp_cst_return_f as b
  on a.permno = b.permno and a.datadate = b.datadate;
quit;

proc freq; tables fyear; run;



************************************************************************************************;
***********************************   ANALYSTS  ************************************************;
************************************************************************************************;
proc sql;
  create table cst_link as
  select a.gvkey, a.permco, a.permno, a.meetingdate, b.ticker, b.score
  from cst_matched2d as a left join ibes.iclink as b
  on a.permno = b.permno and b.score in (0,1,2);
quit;

proc sort data = cst_link; by gvkey permco permno meetingdate score;

data temp_cst_link; set cst_link; by gvkey permco permno meetingdate score;
  if first.meetingdate;
run;

* Using detail file;
proc sql;
  create table analysts as
  select a.*, b.usfirm, b.cusip, value, anndats, fpedats, estimator, analys
  from temp_cst_link as a left join ibes.Detu_epsus as b
  on a.ticker = b.ticker and 1 <= intck('month', b.fpedats, a.meetingdate) <= 12 and FPI='1' and measure="EPS" 
    and b.usfirm = 1 and 1 <= intck('day', b.anndats, b.fpedats) <= 365;
quit;

proc sort data = analysts; by gvkey permco meetingdate permno fpedats estimator analys anndats; where value ne .;

data analysts; set analysts; by gvkey permco meetingdate permno fpedats estimator analys anndats; 
  if last.analys;
  if weekday(anndats) = 1 then anndats2 = anndats-2; else
  if weekday(anndats) = 7 then anndats2 = anndats-1; else
  anndats2 = anndats;

  if weekday(fpedats) = 1 then fpedats2 = fpedats-2; else
  if weekday(fpedats) = 7 then fpedats2 = fpedats-1; else
  fpedats2 = fpedats;

* get CRSP Cumulative adjustment factor as of report date of the estimate;
proc sql;
  create table analysts1 as
  select a.*, b.date as permno_date_est, b.cfacshr as crspadj_est, b.prc as prc_est
  from analysts as a left join crsp.dsf (keep=permno cfacshr date prc) as b
  on a.permno = b.permno and a.anndats2 = b.date;
quit;

proc sql;
  create table analysts2 as
  select a.*, b.date as permno_date_rep, b.cfacshr as crspadj_rep, b.prc as prc_rep
  from analysts1 as a left join crsp.dsf (keep=permno cfacshr date prc) as b
  on a.permno = b.permno and a.fpedats2 = b.date;
quit;

data analysts2; set analysts2; 
  value_adj = value/(crspadj_est/crspadj_rep);

proc sort data = analysts2; by gvkey permco meetingdate permno fpedats;

proc means data = analysts2 noprint; by gvkey permco meetingdate permno fpedats;
  var value value_adj;
  output out = analysts2 n(value_adj) = nanalyst std(value) = dispersionu std(value_adj) = dispersion;

data temp_analysts; set analysts2; by gvkey permco meetingdate permno fpedats;
  if last.permno;

data x; set temp_analysts; by gvkey permco meetingdate;
  if not (first.meetingdate and last.meetingdate); *** DUAL-class firms;

proc means data = temp_analysts n mean median p25 p75;
  var nanalyst dispersion dispersionu;
run;

************************************************************************************************;
*********************************** END OF ANALYSTS  *******************************************;
************************************************************************************************;


proc sql;
  create table cst_matched2x as
  select a.*, b.nanalyst, b.dispersion
  from cst_matched2x as a left join temp_analysts as b
  on a.permno = b.permno and a.meetingdate = b.meetingdate;
quit;

data cst_matched2x; set cst_matched2x;
  if nanalyst = . and fyear > 1985 then nanalyst = 0;

* firm age;
proc sort data = crsp.msf (keep=permno date) out = msf; by permno date;

data firm_age; set msf; by permno date;
  if first.permno;
  first_date_crsp = date;
  drop date;
  format first_date_crsp date9.;
run;

proc sort data = cst_matched2x; by permno;

data cst_matched2b; merge cst_matched2x (in=a) firm_age; by permno; if a;
  firmage = intck('year', first_date_crsp, meetingdate);
run;

proc sort data = cst_matched2b; by sich fyear;

proc means data = cst_matched2b noprint; by sich fyear;
  var ret1y ret1y_f ret2y ret2y_f roa roa2 roa3 roa4 roa_post roa2_post roa3_post roa4_post;
  output out = ind_adj (drop=_type_ _freq_) median = ret1y_ind ret1y_f_ind ret2y_ind ret2y_f_ind roa_ind roa2_ind roa3_ind roa4_ind roa_post_ind roa2_post_ind roa3_post_ind roa4_post_ind;

data cst_matched2b; merge cst_matched2b ind_adj; by sich fyear;
  roa_adj = roa-roa_ind;
  roa2_adj = roa2-roa2_ind;
  roa3_adj = roa3-roa3_ind;
  roa4_adj = roa4-roa4_ind;
  roa_post_adj = roa_post-roa_post_ind;
  roa_post2_adj = roa2_post-roa2_post_ind;
  roa_post3_adj = roa3_post-roa3_post_ind;
  roa_post4_adj = roa4_post-roa4_post_ind;
  ret1y_adj = ret1y-ret1y_ind;
  ret2y_adj = ret2y-ret2y_ind;
  ret1y_f_adj = ret1y_f-ret1y_f_ind;
  ret2y_f_adj = ret2y_f-ret2y_f_ind;
run;


************************************************************************************************;
***********************************   Stock Liquidity  *****************************************;
************************************************************************************************;
proc sql;
  create table illiq as
  select a.permno, a.meetingdate, b.date as datem, b.ret, b.vol, b.prc, intnx('month', b.date, 0, 'end') as mend
  from cst_matched2b as a, crsp.dsf as b
  where a.permno = b.permno and 0 <= intck('month', b.date, a.meetingdate) <= 11;
quit;

proc sort data = illiq nodupkey; by permno meetingdate datem;

data illiq2; set illiq; by permno meetingdate;
  dvolume = vol*abs(prc)/1000000; * Dollar volume in million $s;
  if first.meetingdate then do;
    sum_pi = 0; sum_pi2 = 0; ni=0; sum_pl = 0; sum_pl2 = 0; nl=0; zr = 0;
  end;
  if dvolume > 0 then do;
    pi2 = abs(ret)/dvolume; pi = sqrt(pi2);
	sum_pi2+pi2; sum_pi+pi; ni+1;
  end;
  if abs(ret) >= 0 then do;
    if ret = 0 then do; 
	  zr+1;
      if year(date) <= 2000 then ret = .001;
	  else if year(date) > 2000 then ret = .0001;
	end;
    pl2 = dvolume/abs(ret); pl = sqrt(pl2);
	sum_pl2+pl2; sum_pl+pl; nl+1;
  end;
  if last.meetingdate then do; 
    illiq = sum_pi/ni; illiq2 = sum_pi2/ni; liq = sum_pl/nl; liq2 = sum_pl2/nl; zrindex = zr/nl; output;
  end;
  keep permno meetingdate illiq illiq2 liq liq2 zrindex; run;

proc sort data = illiq2; by descending illiq; run;

proc means data = illiq2 n mean median p1 p5 p10 p90 p95 p99 max; 
  var illiq2 illiq;

%winsor(dsetin=illiq2, dsetout=temp_illiq2, byvar=none, vars=illiq2 illiq, type=winsor, pctl=.5 99.5);
run;

proc sort data = temp_illiq2; by permno meetingdate;

***************************************************************************************************************;
*******************************************   Institutional Ownership  ****************************************;
***************************************************************************************************************;
proc sql;
  create table s34 as
  select a.permno, a.meetingdate, b.*
  from cst_matched2b as a left join tfn.s34(keep=mgrno rdate fdate cusip shrout1 shrout2 prc shares) as b
  on a.ncusip = b.cusip and intck('quarter', b.rdate, a.meetingdate)=1;
quit;

data s34; set s34;
  where mgrno > 0;
  shrout= shrout1 * 1000000; *shrout1 is in millions; 
  if shrout2 > 0 then shrout= shrout2 * 1000; *shrout2 is in thousands; 
  if shrout > 0 then wt = shares/shrout;
  rqdate = intnx('quarter', rdate, 0, 'end');
  format rqdate date9.;
  drop shrout1 shrout2;

proc sort data = s34; by permno meetingdate mgrno fdate;

data s34; set s34; by permno meetingdate mgrno fdate;
  if first.mgrno;

proc sort data = s34; by permno meetingdate;

proc means data = s34 noprint; by permno meetingdate;
  var wt;
  output out = temp_io (drop=_type_ _freq_) sum = io n = num_ii; 
run;


proc sort data = cst_matched2b; by permno meetingdate;

data cst_matched3; merge cst_matched2b (in=a) temp_illiq2; by permno meetingdate; if a;
  year = year(meetingdate);

proc sort data = cst_matched3; by year;

proc rank data = cst_matched3 out = temp.cst_matched groups = 3; by year;
  var illiq illiq2;
  ranks r_illiq r_illiq2;
run;
